from transformers import BertForMaskedLM, BertTokenizer
import torch

# Loading models
tokenizer = BertTokenizer.from_pretrained("PLMs/Erlangshen-TCBert-110M-Sentence-Embedding-Chinese")
model = BertForMaskedLM.from_pretrained("PLMs/Erlangshen-TCBert-110M-Sentence-Embedding-Chinese")

# Cosine similarity function
cos = torch.nn.CosineSimilarity(dim=0, eps=1e-8)

with torch.no_grad():
    # To extract sentence representations for training data
    training_input = tokenizer("""沙漠真人本至尊，青蛇罢祀出梧垣。
孝陵松柏犹樵牧，元庙何妨有泪痕。""", return_tensors="pt")
    training_output = model(**training_input, output_hidden_states=True)
    training_representation = torch.mean(training_output.hidden_states[-1].squeeze(), dim=0)

    # To extract sentence representations for test data
    test_input = tokenizer("""想像承平乐事留，履綦陈迹也风流。
轻烟翠柳今何处，十六门如十六楼。""", return_tensors="pt")
    test_output = model(**test_input, output_hidden_states=True)
    test_representation = torch.mean(test_output.hidden_states[-1].squeeze(), dim=0)

# Calculate similarity scores
similarity_score = cos(training_representation, test_representation)
print(similarity_score)
